/*

This -do file generates variables to be used in the analysis of the Malawi student incentive project
 
Last version: Hyuk Son, 07/22/2021
 
uses:
	y1_raw.dta
 
saves:
	merged_y1y2.dta
 
Contents
*
*             1 Generate various useful variables
*					1.1 	School-grade and Zones	
*					1.2 	Randomization dummies
*								1.2.1 Year1 1st stage randomization variable 
*								1.2.2 Year1 2nd stage randomization rariable
*								1.2.3 1st stage x 2nd stage	
*					1.3 	Participation
*								1.3.1 Year1 Exam participation
*								1.3.2 Year1 Sample selection
*
*             2 Exam results 
*					2.1		Standardized Scores
*								2.1.1 Year 1 Baseline exam
*                           	2.1.2 Year 1 Mid-term exam
*                          		2.1.3 Year 1 Final exam
*					2.2		Rank
*                  				2.2.1 Year 1 Baseline exam
*                   			2.2.2 Year 1 Mid-term exam
*                           	2.2.3 Year 1 Final exam
*					2.3		Subgroup
*                 	         	 2.3.1 Year 1
*			 		2.4 Subgroup rank		 
* 								 2.4.1 Year1 Mid-term exam
* 								 2.4.2 Year1 Final exam 
*			 		2.5 Score increase		 
*								 2.5.1 Year1 
*
*             3 Intermediate outcomes
*                   3.1 Understanding and expectation of scholarship
* 								 3.1.1 Before the randomization result announcement
*								 3.1.2 After the randomization result announcement (Followup survey)
*                   3.2 Student investment
*								 3.2.1 Attendance
*								 3.2.2 Study hours
*								 3.2.3 Homework completion
*                   3.3	Cognitive abilities
*								 3.3.1 Raven test
*								 3.3.2 Computation score
*					3.4	Non-cognitive abilities
*					3.5	Student motivation
*					3.6	Teacher and Parental response
*								 3.6.1 Teacher response
*								 3.6.2 Parental effort
*								 3.6.3 Parents' mentioning the scholarship to the students (f1_q503)
*
*			  4 Intermediate outcome variables baseline
*                   4.1 Student investment
*								 4.1.1 Study hours
*								 4.1.2 Homework completion
*                   4.2	Cognitive abilities
*								 4.2.1 Raven test
*					4.3	Non-cognitive abilities
*					4.4	Student motivation
*					4.5	Teacher and Parental response
*								 4.5.1 Teacher response
*								 4.5.2 Parental effort
*
*             5 Demographics/Assets
*					5.1 Baseline
*								 5.1.1 Demographics
*								 5.1.2 Assets
*
*					5.2 Follow-up
*								 5.2.1 Demographics
*
*             6 Quantile and dummy variables by rank using exam score
* 					6.1 Quantiles		 
*			 		6.2 Dummy variables using percentage rank			
* 							6.2.1 Baseline top & bottom 85% 
* 							6.2.2 Mid-term top & bottom 85% 
* 							6.2.3 Mid-term subgroup top 15% dummy
*
*             7 Interactions for Heterogeneity Analysis
*					7.1 Interaction with top15% dummy
* 							7.1.1 Baseline exam
*							7.1.2 Mid-term exam
*					7.2 Interaction with Subgroup top15% dummy
*							7.2.1 Baseline exam
*							7.2.2 Mid-term exam
*					7.3 Interaction with baseline exam score
*					7.4 Interaction with improvement dummy
*					7.5 Interaction of tutored students and both group	(Year2)
*
*			  8 Scholarship awardee selection
*			 		8.1 Students who meet the selection criteria
* 							8.1.1 Top 15% (merit based scholarship condition)
*							8.1.2 Subgroup rank higher than 15 (relative merit based scholarship condition)
* 					8.2 Actual recepients
*							8.2.1 Year1
*
*			  Year 2
*  			  9 Setting
*
* 					9.1 Merge raw data of year1 and year2
*			 		9.2 Project year variable
*
*  			  10 Generate various useful variables						
*			 		10.1 School-grade and Zones
*							10.1.1 Cluster
* 							10.1.2 Zone
*			 		10.2 Randomization group
*			 		10.3 Participation		
*
*			  11 Exam result					
*			 		11.1 Standardized score
*							11.1.1 Score
*		 					11.1.2 Score change
*					11.2 Rank		 
* 							11.2.1 Year2 Baseline exam
* 							11.2.2 Year2 Final exam	
* 					11.3 Subgroup		 
* 					11.4 Subgroup rank		 
* 							11.4.1 Baseline exam 
* 							11.4.2 Final exam 
*			 		11.5 Scholarship selection		 
*					
*			  12 Tutor variables
*				  	12.1 Tutor dummy 		
* 				 	12.2 Number of students for each tutoring class 
*			 		12.3 Interaction of tutored students and both group	(Year2)	
*
*  			  13 Intermediate outcomes
*					13.1 Understanding and expectation of scholarship 
*
*   		  14 Demographic characteristics
*
* 			  15 School characteristics
*			 		15.1 Class size		 
* 					15.2 School size		 
*
*			  16 Ordering variables 
*
*  			  17 Labeling variables
*
*			  18 Compress and save data
*/

********************************************************************************************************************
/*
  					0	Set up
*/
********************************************************************************************************************
clear all
set more off

include "$do_path/programs.do"
****************************************************
/* 		0.3 Read in data		 */
****************************************************
	use "${raw_path}/y1_raw.dta", clear

********************************************************************************************************************
/*
  					1.	Generate various useful variables						
*/
********************************************************************************************************************

****************************************************
/* 		1.1 School-grade and Zones		 */
****************************************************
  * 1.1.1 Cluster
	tostring id, replace
	gen id1 = substr(id, 1,2)
	gen id2 = substr(id, 3,1)
	gen id3 = substr(id, 4,1)
	destring id1, replace
	destring id2, replace
	destring id3, replace
	replace y1_schoolno=id1
	replace y1_standard=id2
	replace y1_class=id3
	drop id1 id2 id3
    egen y1_school_class=concat(y1_schoolno y1_standard)
	replace y1_school_class="" if y1_school_class==".."
	destring y1_school_class, replace
	
  * 1.1.2 Zone
    gen y1_zone=1 if y1_schoolno>10&y1_schoolno<22
	replace y1_zone=2 if y1_schoolno>21&y1_schoolno<33
	replace y1_zone=3 if y1_schoolno>32&y1_schoolno<42

  * 1.1.3 Grade dummies
	gen y1_standard5 = (y1_standard==5)
	gen y1_standard6 = (y1_standard==6) 
	gen y1_standard7 = (y1_standard==7) 
	gen y1_standard8 = (y1_standard==8) 
	
****************************************************
/* 		1.2 Randomization dummies		 */
****************************************************
 * 1.2.1 Year1 1st stage randomization variable 
	rename rct_edit y1_rand
	gen merit = (y1_rand==1)   
	gen relative = (y1_rand==2)
	gen control = (y1_rand==3) 
   
 * 1.2.2 Year1 2nd stage randomization rariable
	rename rct_stg2 feedback
	replace feedback=. if y1_standard==8
	
 * 1.2.3 Year1 1st stage x 2nd stage		
	gen meritfeed=.
	replace meritfeed=merit*feedback
	replace meritfeed=0 if meritfeed~=1
	replace meritfeed=. if merit==.|feedback==.
	
	gen relativefeed=.
	replace relativefeed=relative*feedback
	replace relativefeed=0 if relativefeed~=1
	replace relativefeed=. if relative==.|feedback==.
		
	order merit-relativefeed, a(y1_rand)

	
****************************************************
/* 		1.3 Participation		 */
****************************************************
 * 1.3.1 Year1 Exam participation
	gen b1_e_p = (b1_r_total>=0&b1_r_total<=48)
	gen m1_e_p = (m1_r_total~=.)
	gen f1_e_p = (f1_r_total~=.)

 * 1.3.2 Year1 Sample selection
	gen y1_sub=.
	replace y1_sub=1 if b1_e_p==1&y1_blq==1
	replace y1_sub=0 if y1_sub~=1
	order y1_sub, a(y1_blq)

	
********************************************************************************************************************
/*
  					2. Exam results
*/
********************************************************************************************************************

****************************************************
/* 		2.1.Standardized score		 */
****************************************************

 * 2.1.1. Year1 Baseline and mid-term exam
drop y1_bin
preserve
	use "${raw_path}/score_baseline.dta", clear 	// baseline (original)
	rename (ID Math Both Rank Rank_sub Percentage unit) (id b1_z_math b1_z_total b1_rank b1_subrank b1_pct_rev y1_bin)
	foreach v of varlist b1_z_total {
		replace `v'="" if `v'=="ab"
	}
	drop if id==.
	qui destring _all, replace
	tostring id, replace
	gen y1_register = 1
	tempfile compare
	qui compress
	save `compare', replace


	use "${raw_path}/score_midline", clear // miderm (original)
	rename (ID Total Rank Percentage Rank_sub) (id m1_z_total m1_rank m1_pct_rev m1_subrank)
	replace m1_z_total="" if m1_z_total=="ab"
	destring m1_z_total m1_subrank, replace
	drop if id==.
	tostring id, replace
	tempfile midterm
	save `midterm', replace
	
restore

	merge 1:1 id using `compare', nogen
	merge 1:1 id using `midterm', nogen

 	drop b1_subpct
	
	ren b1_z_total b1ztotaltest
	tempvar b1mean
	bysort y1_standard: egen `b1mean' = mean(b1ztotaltest) if y1_sub==1&control==1
	tempvar b1sd
	bysort y1_standard: egen `b1sd' = sd(b1ztotaltest) if y1_sub==1&control==1

	tempvar b1mean2 
	egen `b1mean2' = max(`b1mean'), by(y1_standard)
	tempvar b1sd2
	egen `b1sd2' = max(`b1sd'), by(y1_standard)

	bysort y1_standard: gen b1_z_total = (b1ztotaltest-`b1mean2')/`b1sd2' if b1_e_p==1
 
	gen b1_subpct = 100-b1_subrank
	gen b1_pct = 100*(1-b1_pct_rev)
	gen m1_subpct = 100-m1_subrank
	gen m1_pct = 100*(1-m1_pct_rev)

 * 2.1.3. Year1 Final exam
	foreach v of varlist f1_r_chi f1_r_eng f1_r_math f1_r_prsc f1_r_ss f1_r_art f1_r_total {
	bysort y1_standard: egen `v'_mean=mean(`v') if f1_e_p==1
	bysort y1_standard: egen `v'_sd=sd(`v') 	if f1_e_p==1
	bysort y1_standard:  gen `v'_z=(`v'-`v'_mean)/`v'_sd if f1_e_p==1
	}
	rename (f1_r_chi_z f1_r_eng_z f1_r_math_z f1_r_prsc_z f1_r_ss_z f1_r_art_z f1_r_total_z) (f1_z_chi f1_z_eng f1_z_math f1_z_prsc f1_z_ss f1_z_art f1_total)
	egen f1_z_avg = rowmean(f1_z_chi f1_z_eng f1_z_math f1_z_prsc f1_z_ss f1_z_art)
	
	bysort y1_standard: egen f1_z_avg_mean=mean(f1_z_avg) if y1_sub==1&control==1
	bysort y1_standard: egen f1_z_avg_sd=sd(f1_z_avg)	  if y1_sub==1&control==1
	egen f1_z_avg_mean1 = max(f1_z_avg_mean), by(y1_standard) 
	egen f1_z_avg_sd1 = max(f1_z_avg_sd), by(y1_standard) 
	bysort y1_standard:  gen f1_z_total=(f1_z_avg-f1_z_avg_mean1)/f1_z_avg_sd1 if f1_e_p==1

	drop *_mean *_sd *mean1 *sd1

****************************************************
/* 		2.2 Rank		 */
****************************************************
 /* 2.2.1. Year1 Baseline exam
	bysort y1_standard: egen b1_rank = rank(b1_z_total), field
	bysort y1_standard: egen b1_nstd = count(b1_e_p) if y1_sub==1 
	bysort y1_standard: gen b1_pct = (1-b1_rank/b1_nstd)*100

 * 2.2.2. Year1 Mid-term exam
	bysort y1_standard: egen m1_rank2 = rank(m1_z_total2), field
	bysort y1_standard: egen m1_nstd2 = count(m1_e_p) if m1_e_p==1
	bysort y1_standard: gen m1_pct2 = (1-m1_rank2/m1_nstd2)*100
*/
 * 2.2.3. Year1 Final exam
	bysort y1_standard : egen f1_rank = rank(f1_z_total), field
	bysort y1_standard : egen f1_nstd = count(f1_e_p) if f1_e_p==1
	bysort y1_standard : gen f1_pct  = (1-f1_rank/f1_nstd)*100



****************************************************
/* 		2.3 Subgroup		 *
****************************************************
 * 2.3.1. Year1 
gen y1_subgroup = .
	forval i=1/30 {
	local v = `i'*100
	local u = (`i'-1)*100
	bysort y1_standard: replace y1_subgroup = `i'  if b1_rank>`u'&b1_rank<=`v'
	}
	
****************************************************
/* 		2.4 Subgroup rank		 */
****************************************************
rename b1_subpct b1_subpct_org

 * 2.4.1 Year1 Baseline exam
	bysort y1_standard y1_bin : egen b1_subrank = rank(b1_z_total), field
	bysort y1_standard y1_bin : egen b1_nbin    = count(b1_e_p) if y1_sub==1 
	bysort y1_standard y1_bin : gen b1_subpct   = (1-b1_subrank/b1_nbin)*100

 * 2.4.1 Year1 Mid-term exam
	bysort y1_standard y1_bin : egen m1_subrank2 = rank(m1_z_total2), field
	bysort y1_standard y1_bin : egen m1_nbin2    = count(m1_e_p) if m1_e_p==1
	bysort y1_standard y1_bin : gen m1_subpct2   = (1-m1_subrank2/m1_nbin2)*100
*/	
 * 2.4.2 Year1 Final exam 
	bysort y1_standard y1_bin : egen f1_subrank = rank(f1_z_total), field
	bysort y1_standard y1_bin : egen f1_nbin    = count(f1_e_p) if f1_e_p==1
	bysort y1_standard y1_bin :  gen f1_subpct  = (1-f1_subrank/f1_nbin)*100
	
	drop *_nbin
	
	
********************************************************************************************************************
/*
  					3. Intermediate outcomes
*/
********************************************************************************************************************

****************************************************
/* 		3.1 Understanding and expectation of scholarship		 */
****************************************************
 * 3.1.1 Before the randomization result announcement
	gen notequiz_dum=(y1_quiz_note2==5)
	replace notequiz_dum=. if y1_quiz_note2==.
	
	forvalue i=1/4{
	gen expect_scholarship`i'=.
	replace expect_scholarship`i'=0 if y1_quiz_note2>`i'&y1_quiz_note2<6
	replace expect_scholarship`i'=1 if y1_quiz_note2>0&y1_quiz_note2<=`i'
	}

	egen quiztotal=rowtotal(y1_quiz1-y1_quiz4 notequiz_dum)
	replace quiztotal=quiztotal/5
	order y1_quiz5, a(y1_quiz4)
	
	egen b1_merit_understand = rowtotal(y1_quiz1 y1_quiz3)
	replace b1_merit_understand = b1_merit_understand/2
	
	egen b1_relative_understand = rowtotal(y1_quiz2 y1_quiz4)
	replace b1_relative_understand = b1_relative_understand/2
	
	egen understand_scholarship=rowmean(y1_quiz1-y1_quiz5)
	
 * 3.1.2 After the randomization result announcement (Followup survey)
	gen f1_q501_dum=.
	replace f1_q501_dum=1 if f1_q501==y1_rand
	replace f1_q501_dum=0 if f1_q501~=y1_rand
	
	gen f1_q504_dum=.
	replace f1_q504_dum=1 if f1_q504==1
	replace f1_q504_dum=0 if f1_q504==0|f1_q504==2

	gen f1_q505_dum=.
	replace f1_q505_dum=1 if f1_q505==1
	replace f1_q505_dum=0 if f1_q505==0|f1_q505==2
  
	gen f1_q506_dum=.
	replace f1_q506_dum=1 if f1_q506==2
	replace f1_q506_dum=0 if f1_q506==1|f1_q506==3

	gen f1_q507_dum=.
	replace f1_q507_dum=1 if f1_q507==13
	replace f1_q507_dum=0 if f1_q507<13&f1_q507>0|f1_q507>13&f1_q507<25

	gen f1_q508_dum=.
	replace f1_q508_dum=1 if f1_q508==2
	replace f1_q508_dum=0 if f1_q508==1|f1_q508>2&f1_q508<5

	gen f1_q509_ans=f1_q509
	replace f1_q509_ans=. if f1_q509==99
	replace f1_q509_ans=5-f1_q509_ans
	
	egen f1_merit_understand = rowmean(f1_q504_dum f1_q506_dum)
	egen f1_relative_understand = rowmean(f1_q505_dum f1_q507_dum)
	
	egen f1_understand_scholarship=rowmean(f1_q504_dum-f1_q508_dum)
	
	forvalue i=1/4{
	gen f1_expect_scholarship`i'=.
	replace f1_expect_scholarship`i'=0 if f1_q509>`i'&f1_q509<6
	replace f1_expect_scholarship`i'=1 if f1_q509>0&f1_q509<=`i'
	}

	order y1_quiz1-y1_quiz4 y1_quiz_note1 y1_quiz_note2 notequiz_dum expect_scholarship* f1_q501_dum-f1_q509_ans f1_expect_scholarship* understand_scholarship f1_understand_scholarship, a(f1_q509)
	
****************************************************
/* 		3.2 Student investment		 */
****************************************************
 * 3.2.1 Attendance
	foreach v of varlist y1_nov14-y1_jun15{
	label variable `v' "`v'"
	gen `v'_abs=.
	replace `v'_abs=1 if `v'==1
	replace `v'_abs=0 if `v'==0

	gen `v'_abs_drop=.
	replace `v'_abs_drop=1 if `v'==1
	replace `v'_abs_drop=0 if `v'==0|`v'==2

	gen `v'_dum= (`v'==1)
	}
	
	egen atd_abs     = rowtotal(y1_nov14_abs y1_dec14_abs y1_jan15_abs y1_feb15_abs y1_mar15_abs y1_apr15_abs y1_may15_abs y1_jun15_abs)
	egen atd_abs_ba  = rowtotal(y1_nov14_abs y1_dec14_abs y1_jan15_abs y1_feb15_abs)
	egen atd_abs_aa0 = rowtotal(y1_mar15_abs y1_apr15_abs y1_may15_abs y1_jun15_abs)

	egen atd_abs_drop     = rowtotal(y1_nov14_abs_drop y1_dec14_abs_drop y1_jan15_abs_drop y1_feb15_abs_drop y1_mar15_abs_drop y1_apr15_abs_drop y1_may15_abs_drop y1_jun15_abs_drop)
	egen atd_abs_drop_ba  = rowtotal(y1_nov14_abs_drop y1_dec14_abs_drop y1_jan15_abs_drop y1_feb15_abs_drop)
	egen atd_abs_drop_aa0 = rowtotal(y1_mar15_abs_drop y1_apr15_abs_drop y1_may15_abs_drop y1_jun15_abs_drop)
	
	egen atd    = rowtotal(y1_nov14_dum y1_dec14_dum y1_jan15_dum y1_feb15_dum y1_mar15_dum y1_apr15_dum y1_may15_dum y1_jun15_dum)
	egen atd_ba = rowtotal(y1_nov14_dum y1_dec14_dum y1_jan15_dum y1_feb15_dum)
	egen atd_aa = rowtotal(y1_mar15_dum y1_apr15_dum y1_may15_dum y1_jun15_dum)
	
	foreach v of varlist atd_abs atd_abs_drop atd{
	gen `v'm=`v'/8
	}
	foreach v of varlist atd_abs_ba atd_abs_drop_ba atd_ba {
	gen `v'm=`v'/4
	}
	order y1_nov14_abs-y1_jun15_dum, a(y1_jun15)
	
	foreach v of varlist atd_abs_aa0 atd_abs_drop_aa0 atd_aa{
	gen `v'm=`v'/4     if y1_standard>4&y1_standard<8
	replace `v'm=`v'/2 if y1_standard==8
	}
	
	order y1_nov14_abs-atd_aam, a(y1_jun15)
 
 * 3.2.2 Study hours
	recode f1_q214 f1_q218 f1_q221 (1=0) (2=0.5) (3=1.5) (4=4) (5=10) (6=20), pre(h)
	recode f1_q215 f1_q219 f1_q222 (1=0.25) (2=0.75) (3=1.5) (4=2.5) (5=3.5), pre(h)

   foreach v of varlist f1_q214 f1_q218 f1_q221{
   forvalue i=1/2{
   local j=`i'+4
   gen `v'`i'=.
   replace `v'`i'=0 if `v'>0&`v'<`j'
   replace `v'`i'=1 if `v'>=`j'&`v'<7
   }
   }
	
	gen f1_studyhour_ac=hf1_q214*hf1_q215
	gen f1_studyhour_bc=hf1_q218*hf1_q219
	gen f1_studyhour_we=hf1_q221*hf1_q222
	egen f1_studyhour_t=rowtotal(f1_studyhour_ac f1_studyhour_bc f1_studyhour_we)
	replace f1_studyhour_t=. if hf1_q214==.&hf1_q215==.&hf1_q218==.&hf1_q219==.&hf1_q221==.&hf1_q222==.

	order f1_q2142-f1_q2212 hf1* f1_studyhour*, a(f1_q222)
	
 * 3.2.3 Homework completion
 	forvalue i=2/5 {
	gen f1_q217`i'=.
	replace f1_q217`i'=0 if f1_q217> `i'& f1_q217 <6
	replace f1_q217`i'=1 if f1_q217 > 0& f1_q217 <=`i'
	}
	order f1_q2172-f1_q2175, a(f1_q217)
 
****************************************************
/* 		3.3 Cognitive abilities		 */
****************************************************
 * 3.3.1 Raven test
	foreach v of varlist f1_q801-f1_q810{
	gen `v'_dum=.
	}
	replace f1_q801_dum=1 if f1_q801==2
	replace f1_q801_dum=0 if f1_q801>2&f1_q801<7|f1_q801==1

	replace f1_q802_dum=1 if f1_q802==1
	replace f1_q802_dum=0 if f1_q802>1&f1_q802<7

	replace f1_q803_dum=1 if f1_q803==1
	replace f1_q803_dum=0 if f1_q803>1&f1_q803<7

	replace f1_q804_dum=1 if f1_q804==5
	replace f1_q804_dum=0 if f1_q804>0&f1_q804<5|f1_q804==6

	replace f1_q805_dum=1 if f1_q805==4
	replace f1_q805_dum=0 if f1_q805>0&f1_q805<4|f1_q805>4&f1_q805<7

	replace f1_q806_dum=1 if f1_q806==8
	replace f1_q806_dum=0 if f1_q806>0&f1_q806<8

	replace f1_q807_dum=1 if f1_q807==3
	replace f1_q807_dum=0 if f1_q807>3&f1_q807<9|f1_q807>0&f1_q807<3

	replace f1_q808_dum=1 if f1_q808==7
	replace f1_q808_dum=0 if f1_q808>0&f1_q808<7|f1_q808==8

	replace f1_q809_dum=1 if f1_q809==5
	replace f1_q809_dum=0 if f1_q809>5&f1_q809<9|f1_q809>0&f1_q809<5

	replace f1_q810_dum=1 if f1_q810==1
	replace f1_q810_dum=0 if f1_q810>1&f1_q810<9

	egen f1_s8_score1=rowmean(f1_q801_dum-f1_q808_dum)
	replace f1_s8_score1=0 if y1_sub==1&y1_fuq==1&f1_s8_score1==.
	foreach v of varlist f1_q801-f1_q810{
	replace `v'_dum=0 if y1_sub==1&y1_fuq==1&`v'_dum==.
	}
	order f1_q801_dum-f1_q810_dum, a(f1_q810)
	order f1_s8_score1, b(f1_q801)

  * 3.3.2 Computation score
	foreach v of varlist f1_q902 f1_q910 f1_q914 f1_q917{
	gen `v'_dum=.
	replace `v'_dum=1 if `v'==1
	replace `v'_dum=0 if `v'>1&`v'<6
	}
	
	foreach v of varlist f1_q905 f1_q906 f1_q907 f1_q909 f1_q919{
	gen `v'_dum=.
	replace `v'_dum=1 if `v'==2
	replace `v'_dum=0 if `v'>2&`v'<6|`v'==1
	}
	
	foreach v of varlist f1_q901 f1_q904 f1_q908 f1_q911 f1_q913{
	gen `v'_dum=.
	replace `v'_dum=1 if `v'==3
	replace `v'_dum=0 if `v'>3&`v'<6|`v'<3&`v'>0
	}
	
	foreach v of varlist f1_q903 f1_q916{
	gen `v'_dum=.
	replace `v'_dum=1 if `v'==4
	replace `v'_dum=0 if `v'>4&`v'<6|`v'<4&`v'>0
	}	
	
	foreach v of varlist f1_q912 f1_q915 f1_q918 f1_q920{ 
	gen `v'_dum=.
	replace `v'_dum=1 if `v'==5
	replace `v'_dum=0 if `v'<5&`v'>0
	}	 
	
	egen f1_s9_score=rowmean(f1_q901_dum-f1_q920_dum)
	replace f1_s9_score=0 if y1_sub==1&y1_fuq==1&f1_s9_score==.
	foreach v of varlist f1_q901-f1_q920{
	replace `v'_dum=0 if y1_sub==1&y1_fuq==1&`v'_dum==.
	}
	order f1_s9_score, a(f1_s8_score1)

****************************************************
/* 		3.4 Non-cognitive abilities		 */
****************************************************
    foreach v of varlist f1_q702 f1_q705 f1_q706 f1_q708 f1_q709{
	recode `v' (1=4) (2=3) (3=2) (4=1), gen(`v'_rev)
	}
	foreach v of varlist f1_q711 f1_q713 f1_q715 f1_q716 f1_q720 f1_q722 f1_q723 f1_q727 {
	recode `v' (1=5) (2=4) (4=2) (5=1), gen(`v'_rev)
	}
	
	egen f1_s7_selfesteem=rowmean(f1_q701 f1_q702_rev f1_q703 f1_q704 f1_q705_rev f1_q706_rev f1_q707 f1_q708_rev f1_q709_rev f1_q710)
	egen f1_s7_grit=rowmean(f1_q711_rev f1_q712 f1_q713_rev f1_q714 f1_q715_rev f1_q716_rev f1_q717 f1_q718)
	egen f1_s7_conscientious=rowmean(f1_q719 f1_q720_rev f1_q721 f1_q722_rev f1_q723_rev f1_q724 f1_q725 f1_q726 f1_q727_rev)

****************************************************
/* 		3.5 Student motivation		 */
****************************************************
	egen f1_q210212=rowtotal(f1_q210-f1_q212) if y1_fuq==1
	egen f1_q210212_m=rowmean(f1_q210-f1_q212) if y1_fuq==1
	gen  f1_q2114 = (f1_q211>=4 &f1_q211<6) if y1_fuq==1
	replace f1_q2114=. if f1_q211==.
	order f1_q210212 f1_q210212_m, a(f1_q212)
	gen f1_motivation = f1_q211
	
****************************************************
/* 		3.6 Teacher and Parental response		 */
****************************************************
 * 3.6.1 Teacher response
	recode f1_q606 (1=5) (2=4) (4=2) (5=1), gen(f1_q606_rev)
	
	egen f1_s6_care=rowmean(f1_q601 f1_q602 f1_q603)
	egen f1_s6_challenge=rowmean(f1_q604 f1_q605 f1_q606_rev)
	egen f1_s6_control=rowmean(f1_q607 f1_q608 f1_q609)
	egen f1_s6_clarify=rowmean(f1_q610 f1_q611 f1_q612)
	egen f1_s6_captivate=rowmean(f1_q613 f1_q614 f1_q615)
	egen f1_s6_confer=rowmean(f1_q616 f1_q617 f1_q618)
	egen f1_s6_consolidate=rowmean(f1_q619 f1_q620 f1_q621)
	order f1_s6_care-f1_s6_consolidate, a(f1_q509_ans)
	
	egen f1_teacherindex=rowmean(f1_s6_care-f1_s6_consolidate)
	genindex f1_s6_care f1_s6_challenge f1_s6_control f1_s6_clarify f1_s6_captivate f1_s6_confer f1_s6_consolidate, prefix(f1_teachereffort) label("Teacher effort Index")

 * 3.6.2 Parental effort
	foreach v of varlist f1_q110c-f1_q110l {
	replace `v' = . if `v'==6
	}
	
	egen f1_parentaleffort_avg=rowmean(f1_q110c-f1_q110l)
	genindex f1_q110c f1_q110d f1_q110e f1_q110f f1_q110g f1_q110h f1_q110i f1_q110j f1_q110k f1_q110l, prefix(f1_parentaleffort) label("Parental effort Index")
   
   * family involvement in study
   egen f1_parentalinvolve=rowmean(f1_q107 f1_q108 f1_q111)
   
 * 3.6.3 Parents' mentioning the scholarship to the students (f1_q503)
 
 
********************************************************************************************************************
/*
  					4. Intermediate outcome variables baseline
*/
******************************************************************************************************************** 
****************************************************
/* 		4.1 Student investment		 */
****************************************************
 * 4.1.1 Study hours
  * Study hours
	recode b1_q217 (1=0) (2=0.5) (3=1.5) (4=4) (5=10) (6=20), pre(h)
	recode b1_q218 (1=0.25) (2=0.75) (3=1.5) (4=2.5) (5=3.5), pre(h)
	gen b1_studyhour=hb1_q217*hb1_q218
	replace b1_studyhour=. if hb1_q217==.|hb1_q218==.
	gen b1_studyhour1=b1_studyhour
	
 * 4.1.2 Homework completion
	gen b1_q2202=(b1_q220==2|b1_q220==1)
	order hb1_q217 hb1_q218 b1_studyhour, a(b1_q218)
	order b1_q2202, a(b1_q220)
 
 
****************************************************
/* 		4.2 Cognitive abilities		 */
****************************************************
 * 4.2.1 Raven test
	foreach v of varlist b1_q1001-b1_q1010{
	gen `v'_dum=.
	}
	replace b1_q1001_dum=1 if b1_q1001==1
	replace b1_q1001_dum=0 if b1_q1001>1&b1_q1001<7

	replace b1_q1002_dum=1 if b1_q1002==2
	replace b1_q1002_dum=0 if b1_q1002>2&b1_q1002<7|b1_q1002==1

	replace b1_q1003_dum=1 if b1_q1003==2
	replace b1_q1003_dum=0 if b1_q1003>2&b1_q1003<7|b1_q1003==1

	replace b1_q1004_dum=1 if b1_q1004==6
	replace b1_q1004_dum=0 if b1_q1004>0&b1_q1004<6

	replace b1_q1005_dum=1 if b1_q1005==2
	replace b1_q1005_dum=0 if b1_q1005>2&b1_q1005<9|b1_q1005==1

	replace b1_q1006_dum=1 if b1_q1006==7
	replace b1_q1006_dum=0 if b1_q1006>0&b1_q1006<7|b1_q1006==8

	replace b1_q1007_dum=1 if b1_q1007==3
	replace b1_q1007_dum=0 if b1_q1007>3&b1_q1007<9|b1_q1007<3

	replace b1_q1008_dum=1 if b1_q1008==4
	replace b1_q1008_dum=0 if b1_q1008>4&b1_q1008<9
	replace b1_q1008_dum=0 if b1_q1008>0&b1_q1008<4

	replace b1_q1009_dum=1 if b1_q1009==5
	replace b1_q1009_dum=0 if b1_q1009>5&b1_q1009<9
	replace b1_q1009_dum=0 if b1_q1009>0&b1_q1009<5

	replace b1_q1010_dum=1 if b1_q1010==1
	replace b1_q1010_dum=0 if b1_q1010>1&b1_q1010<9

	egen b1_s10_score1=rowmean(b1_q1001_dum b1_q1002_dum b1_q1003_dum b1_q1005_dum)
	egen b1_s10_score2=rowmean(b1_q1001_dum-b1_q1008_dum)

	order b1_q1001_dum-b1_q1010_dum, a(b1_q1010)
	order b1_s10_score1 b1_s10_score2, b(b1_q1001)

****************************************************
/* 		4.3 Non-cognitive abilities		 */
****************************************************
    foreach v of varlist b1_q902 b1_q905 b1_q906 b1_q908 b1_q909{
	recode `v' (1=4) (2=3) (3=2) (4=1), gen(`v'_rev)
	}
	foreach v of varlist b1_q911 b1_q913 b1_q915 b1_q916 b1_q920 b1_q922 b1_q923 b1_q927 {
	recode `v' (1=5) (2=4) (4=2) (5=1), gen(`v'_rev)
	}
	egen b1_s9_selfesteem=rowmean(b1_q901 b1_q902_rev b1_q903 b1_q904 b1_q905_rev b1_q906_rev b1_q907 b1_q908_rev b1_q909_rev b1_q910)
	egen b1_s9_grit=rowmean(b1_q911_rev b1_q912 b1_q913_rev b1_q914 b1_q915_rev b1_q916_rev b1_q917 b1_q918)
	egen b1_s9_conscientious=rowmean(b1_q919 b1_q920_rev b1_q921 b1_q922_rev b1_q923_rev b1_q924 b1_q925 b1_q926 b1_q927_rev)

	order b1_q902_rev-b1_q927_rev, a(b1_q927)
	order b1_s9_*, b(b1_q901)

****************************************************
/* 		4.4 Student motivation		 */
****************************************************
	egen b1_q213215=rowtotal(b1_q213-b1_q215)
	egen b1_q213215_m=rowmean(b1_q213 b1_q214 b1_q215)
	order b1_q213215 b1_q213215_m, a(b1_q215)

	gen b1_q2144=.
	replace b1_q2144=0 if b1_q214>0&b1_q214<4
	replace b1_q2144=1 if b1_q214>=4&b1_q214<6
	gen b1_motivation = b1_q214

	egen b1_s7_selfefficacy=rowmean(b1_q706 b1_q710 b1_q712 b1_q713 b1_q715 b1_q717 b1_q720 b1_q722 b1_q723)
	egen b1_s7_intrinsicvalue=rowmean(b1_q705 b1_q708 b1_q709 b1_q711 b1_q714 b1_q718 b1_q719 b1_q721 b1_q725)
	egen b1_s7_testanxiety=rowmean(b1_q707 b1_q716 b1_q724 b1_q726)
	egen b1_s7_cognitivestrategyuse=rowmean(b1_q727 b1_q728 b1_q730 b1_q732 b1_q733 b1_q734 b1_q735 b1_q738 b1_q740 b1_q743 b1_q745 b1_q746 b1_q748)
	egen b1_s7_selfregulation=rowmean(b1_q729 b1_q731 b1_q736 b1_q737 b1_q739 b1_q741 b1_q742 b1_q744 b1_q747)
	order b1_s7_*, a(b1_q704)
	
****************************************************
/* 		4.5 Teacher and Parental response		 */
****************************************************
 * 4.5.1 Teacher response
	recode b1_q806 (1=5) (2=4) (4=2) (5=1), gen(b1_q806_rev)
	
	egen b1_s8_care=rowmean(b1_q801 b1_q802 b1_q803)
	egen b1_s8_challenge=rowmean(b1_q804 b1_q805 b1_q806_rev)
	egen b1_s8_control=rowmean(b1_q807 b1_q808 b1_q809)
	egen b1_s8_clarify=rowmean(b1_q810 b1_q811 b1_q812)
	egen b1_s8_captivate=rowmean(b1_q813 b1_q814 b1_q815)
	egen b1_s8_confer=rowmean(b1_q816 b1_q817 b1_q818)
	egen b1_s8_consolidate=rowmean(b1_q819 b1_q820 b1_q821)
	order b1_s8_*, a(b1_s7_selfregulation)
	
	egen b1_teacherindex=rowmean(b1_s8_care-b1_s8_consolidate)
	genindex b1_s8_care b1_s8_challenge b1_s8_control b1_s8_clarify b1_s8_captivate b1_s8_confer b1_s8_consolidate, prefix(b1_teachereffort) label("Teacher effort Index")
	
 * 4.5.2 Parental effort
	gen b1_q116c_mi=b1_q116c if b1_q116c<6
	gen b1_q116d_mi=b1_q116d if b1_q116d<6
	*egen b1_parentaleffort=rowmean(b1_q116c_mi b1_q116d_mi)
	genindex b1_q116c_mi b1_q116d_mi, prefix(b1_parentaleffort) label("Parental effort Index")     

********************************************************************************************************************
/*
  					5. Demographics/Assets
*/
********************************************************************************************************************

****************************************************
/* 		5.1 Baseline		 */
****************************************************

 * 5.1.1 Demographics
	gen sex=b1_q101c3
	gen male = (sex==1) if sex~=. & sex~=77
	gen age=b1_q101d3
  
  * Household size
  	forval i = 1/14 {
  		gen b1_hhmem_dum`i' = (b1_q101c`i'~=.)
  	}
	
	egen b1_hhsize=rowtotal(b1_hhmem_dum*) 
	replace b1_hhsize=. if b1_hhmem_dum1==. & b1_hhmem_dum2==. & b1_hhmem_dum3==. & b1_hhmem_dum4==. & b1_hhmem_dum5==. & b1_hhmem_dum6==. & b1_hhmem_dum7==. & b1_hhmem_dum8==. & b1_hhmem_dum9==. & b1_hhmem_dum10==. & b1_hhmem_dum11==. & b1_hhmem_dum12==. & b1_hhmem_dum13==. & b1_hhmem_dum14==.
	
  * Tribe dummy
	gen b1_q105_chewa=(b1_q105==1)
	replace b1_q105_chewa=. if b1_q105==.

  * Performance in class 
	gen b1_q216g4=.
	replace b1_q216g4=0 if b1_q216g>0&b1_q216g<4
	replace b1_q216g4=1 if b1_q216g>=4&b1_q216g<6
	
  * study with friends
	egen b1_q601e=rowmean(b1_q601e*)
  
  * hang out with friends
	egen b1_q601f=rowmean(b1_q601f*)  
  

 * 5.1.2 Assets
  * Durable good score
	egen b1_s5_durablegood=rowmean(b1_q503b-b1_q503j)
	pca b1_q501 b1_q502 b1_q503a b1_q503b b1_q503c b1_q503d b1_q503e b1_q503f b1_q503g b1_q503h b1_q503i b1_q503j b1_q504 b1_q505a b1_q505b b1_q505c b1_q505d b1_q505e b1_q505f b1_q505g b1_q505h b1_q505i b1_q506a b1_q506b b1_q506c b1_q506d	
	predict b1_s5_asset

	order b1_hhsize, a(b1_q101h14)
	order b1_q105_chewa, a(b1_q105spe)	
	order b1_q601e b1_q601f, a(b1_q601f10)


********************************************************************************************************************
/*
  					6. Quantile and dummy variables by rank using exam score
*/
********************************************************************************************************************

****************************************************
/* 		6.1 Quantiles		 */
****************************************************
 * 6.1.1 Baseline exam
	xtile b1_quantile = b1_z_total if b1_z_total~=., n(5)
	xtile b1_quantile_4 = b1_pct if b1_pct~=., n(4) 
	
	gen b1_quantile_3=.
	replace b1_quantile_3=1 if b1_quantile_4==1
	replace b1_quantile_3=2 if b1_quantile_4==2|b1_quantile_4==3
	replace b1_quantile_3=3 if b1_quantile_4==4

	xtile b1_decile=b1_pct if b1_pct~=., n(10)
	
 * 6.1.2 Mid-term exam
	xtile m1_quantile = m1_z_total if m1_z_total~=., n(5)

 * 6.1.3 Final exam
	xtile f1_quantile = f1_z_total if f1_z_total~=., n(5)
	

****************************************************
/* 		6.2 Dummy variables using percentage rank		 */
****************************************************
	
* 6.2.1 Baseline top & bottom 85% 
	gen b1_top15=(b1_pct>=85&b1_pct<=100) if b1_e_p==1
	gen b1_low15=(b1_pct<85&b1_pct>=0) if b1_e_p==1
	gen b1_low30=(b1_pct<30&b1_pct>=0) if b1_e_p==1
	gen b1_sub15=(b1_subpct>=85&b1_subpct<=100) if b1_e_p==1	
*	gen b1_sub15_org=(b1_subpct_org>=85&b1_subpct_org<=100) if b1_e_p==1
	
 * 6.2.2 Mid-term top & bottom 85% 
	gen m1_top15=(m1_e_p==1&m1_pct>=85&m1_pct<=100)
	gen m1_low15=(m1_e_p==1&m1_pct<85 &m1_pct>=0)
	gen m1_low30=(m1_e_p==1&m1_pct<30 &m1_pct>=0)

 * 6.2.3 Mid-term subgroup top 15% dummy
	gen m1_sub15= (m1_e_p==1&m1_subpct>=85&m1_subpct<=100)
	gen m1_sub85= (m1_e_p==1&m1_subpct>=0 &m1_subpct< 85)

* 6.2.4 Final exam top & bottom 85% 
	gen f1_top15=(f1_pct>=85&f1_pct<=100) if f1_e_p==1
	gen f1_low15=(f1_pct<85&f1_pct>=0) if f1_e_p==1
	gen f1_sub15=(f1_subpct>=85&f1_subpct<=100) if f1_e_p==1
	
****************************************************
/* 		6.3 Score change		 */
****************************************************
 * 6.3.1 Year1 
	gen m1_b1=m1_z_total-b1_z_total	
	
	gen bs1_b1 = b1_q216g - b1_quantile
	gen bs1_m1 = b1_q216g - m1_quantile
	gen fs1_m1 = f1_q213g - m1_quantile
	gen fs1_f1 = f1_q213g - f1_quantile
	
	gen inc = (m1_b1>0.185&m1_b1<3.4)   if y1_sub==1
	gen dec = (m1_b1>-3.3&m1_b1<-0.15)  if y1_sub==1
	
	
********************************************************************************************************************
/*
  					7. Interactions for Heterogeneity Analysis
*/
********************************************************************************************************************

****************************************************
/* 		7.1 Interaction with top15% dummy		 */
****************************************************
 * 7.1.1 Baseline exam
	gen m_b1top15 = merit*b1_top15
	gen r_b1top15 = relative*b1_top15
	gen f_b1top15 = feedback*b1_top15
	gen mf_b1top15 = merit*f_b1top15
	gen rf_b1top15 = relative*f_b1top15

	gen m_b1low15 = merit*b1_low15
	gen r_b1low15 = relative*b1_low15
	gen m_b1low30 = merit*b1_low30
	gen r_b1low30 = relative*b1_low30
	
 * 7.1.2 Mid-term exam
	gen m_m1top15 = merit*m1_top15
	gen r_m1top15 = relative*m1_top15
	gen f_m1top15 = feedback*m1_top15
	gen mf_m1top15 = merit*f_m1top15
	gen rf_m1top15 = relative*f_m1top15

	gen m_m1low30 = merit*m1_low30
	gen r_m1low30 = relative*m1_low30
	gen f_m1low30 = feedback*m1_low30
	gen mf_m1low30 = merit*f_m1low30
	gen rf_m1low30 = relative*f_m1low30
	

****************************************************
/* 		7.2 Interaction with Subgroup top15% dummy		 */
****************************************************
 * 7.2.1 Baseline exam
 	gen m_b1sub15 = merit*b1_sub15
	gen r_b1sub15 = relative*b1_sub15
	gen f_b1sub15 = feedback*b1_sub15
	gen mf_b1sub15 = merit*f_b1sub15
	gen rf_b1sub15 = relative*f_b1sub15
	
* 	gen m_b1sub15_org = merit*b1_sub15_org
*	gen r_b1sub15_org = relative*b1_sub15_org
*	gen f_b1sub15_org = feedback*b1_sub15_org
*	gen mf_b1sub15_org = merit*f_b1sub15_org
*	gen rf_b1sub15_org = relative*f_b1sub15_org
	
 * 7.2.2 Mid-term exam
	gen m_m1sub15 = merit*m1_sub15
	gen r_m1sub15 = relative*m1_sub15
	gen f_m1sub15 = feedback*m1_sub15
	gen mf_m1sub15 = relative*f_m1sub15
	gen rf_m1sub15 = relative*f_m1sub15

*	gen m_m1sub15_org = merit*m1_sub15_org
*	gen r_m1sub15_org = relative*m1_sub15_org
*	gen f_m1sub15_org = feedback*m1_sub15_org
*	gen mf_m1sub15_org = relative*f_m1sub15_org
*	gen rf_m1sub15_org = relative*f_m1sub15_org

****************************************************
/* 		7.3 Interaction with exam rank		 */
****************************************************
 * 7.3.1 Baseline exam
	gen b1merit = merit*b1_pct
	gen b1relative = relative*b1_pct
	gen f_b1pct = feedback*b1_pct
	gen mf_b1pct = feedback*b1merit
	gen rf_b1pct = feedback*b1relative

	gen b1smerit = merit*b1_subpct
	gen b1srelative = relative*b1_subpct
	gen f_b1subpct = feedback*b1_subpct
	gen mf_b1subpct = feedback*b1merit
	gen rf_b1subpct = feedback*b1relative
	
 * 7.3.2 Mid-term exam
	gen m1merit = merit*m1_pct
	gen m1relative = relative*m1_pct
	gen f_m1pct = feedback*m1_pct
	gen mf_m1pct = feedback*m1merit
	gen rf_m1pct = feedback*m1relative
	
	gen m1smerit    = merit*m1_subpct
	gen m1srelative = relative*m1_subpct
	gen f_m1subpct  = feedback*m1_subpct
	gen mf_m1subpct = feedback*m1merit
	gen rf_m1subpct = feedback*m1relative

****************************************************
/* 		7.4 Interaction with improvement dummy		 */
****************************************************
	foreach v of varlist inc dec {
	gen m_`v'  = merit*`v'
	gen r_`v'  = relative * `v'
	gen f_`v'  = feedback * `v' 
	gen mf_`v' = merit * feedback * `v'
	gen rf_`v' = relative * feedback * `v'
	gen change_`v' = m1_b1 * `v'
	gen mchange_`v' = merit * m1_b1 * `v'
	gen rchange_`v' = relative * m1_b1 * `v'
	gen fchange_`v' = feedback * m1_b1 * `v'
	gen mf_change_`v' = merit * feedback * m1_b1 * `v'
	gen rf_change_`v' = relative * feedback * m1_b1 * `v'
	}
	
	foreach v of varlist m1_b1 bs1_b1 fs1_m1 fs1_f1 {
	gen f_`v' = feedback * `v' 
	gen m_`v' = merit * `v'
	gen r_`v' = relative * `v'
	gen mf_`v' = merit * feedback * `v'
	gen rf_`v' = relative * feedback * `v'
	}
	
	
********************************************************************************************************************
/*
  					8. Scholarship awardee selection
*/
********************************************************************************************************************

****************************************************
/* 		8.1 Students who meet the selection criteria		 */
****************************************************
    
 * 8.1.1 Top 15% (merit based scholarship condition)
	gen mscondition=.
	replace mscondition=1 if f1_pct>=85&f1_pct<=100&y1_sub==1
	replace mscondition=0 if mscondition~=1&y1_sub==1
  
 * 8.1.2 Subgroup rank higher than 15 (relative merit based scholarship condition)
	gen rscondition=.
	replace rscondition=1 if f1_subpct<=100&f1_subpct>=85&y1_sub==1
	replace rscondition=0 if rscondition~=1&y1_sub==1

****************************************************
/* 		8.2 Actual recepients		 */
****************************************************
 * 8.2.1 Year1	
	gen y1_scholar=1 if f1_pct>=85&f1_pct<=100&y1_rand==1&y1_sub==1
	replace y1_scholar=1 if f1_subpct<=100&f1_subpct>=85&y1_sub==1&y1_rand==2
	replace y1_scholar=0 if y1_scholar~=1&y1_sub==1
	
	
********************************************************************************************************************
/*

* 											Year 2

*/
********************************************************************************************************************

********************************************************************************************************************
/*
  					9. Setting
*/
********************************************************************************************************************
****************************************************
/* 		9.1 Merge raw data of year1 and year2		 */
****************************************************
	merge 1:1 id using "${raw_path}/y2_raw.dta"

********************************************************************************************************************
/*
  					10. Create imputed version variable and dummy variables for the missing values (added Apr 2017)
*/
********************************************************************************************************************
	foreach v of varlist age male b1_q105_chewa b1_hhsize b1_s5_asset atd_abs_bam b1_studyhour b1_motivation b1_s9_selfesteem b1_s9_grit b1_s9_conscientious b1_teacherindex b1_parentaleffort atd_abs_aa0m f1_studyhour_ac f1_motivation f1_s7_selfesteem f1_s7_grit f1_s7_conscientious attd_stu s1_studyhour_ac s1_motivation s1_selfesteem s1_grit s1_conscientious {
	gen `v'_imp = `v'
	replace `v'_imp= 77 if `v'==.
	gen `v'_dum = (`v'_imp==77) 
	}

****************************************************
/* 		9.1 Merge raw data of year1 and year2		 *
****************************************************
	append using raw/tutor_raw, force

****************************************************
/* 		9.2 Project year variable		 */
****************************************************
	gen year=1
	replace year=2 if y2_register==1
	label var year "Project year"
	drop tutorcog tutornoncog tutorstudent
	qui destring _all, replace

********************************************************************************************************************
/*
  					10. Generate various useful variables						
*/
********************************************************************************************************************

****************************************************
/* 		10.1 School-grade and Zones		 */
****************************************************
  * 10.1.1 Cluster
	egen y2_school_class=concat(y2_schoolno y2_standard)
	replace y2_school_class="" if y2_school_class==".."
	destring y2_school_class, replace

  * 10.1.2 Zone
    gen y2_zone=1 if y2_schoolno>10&y2_schoolno<22
	replace y2_zone=2 if y2_schoolno>21&y2_schoolno<33
	replace y2_zone=3 if y2_schoolno>32&y2_schoolno<42

****************************************************
/* 		10.2 Randomization group		 */
****************************************************
	gen y2_rand = 1 if y2_group=="G1"
	replace y2_rand = 2 if y2_group=="G2"
	replace y2_rand = 3 if y2_group=="G3"
	replace y2_rand = 4 if y2_group=="G4"

	rename y2_scholarship scholarschool
	rename y2_tutoring    tutorschool
	rename y2_tutoringstu tutorstudent
	replace tutorstudent = 0 if id==1351022|id==1351046|id==1351060|id==1561002|id==1608817|id==1620958
	
	gen y2_control  = (y2_rand==1)   if year==2
	gen tutoronlyschool   = (y2_rand==2)   if year==2
	gen scholaronlyschool = (y2_rand==3)   if year==2
	gen bothschool = (y2_rand==4)     if year==2


	gen notutorschool = (y2_rand==1|y2_rand==3) if year==2
	gen notutorstudent = (tutorstudent == 0 & tutorschool==1)    if year==2

****************************************************
/* 		10.3 Participation		 */
****************************************************
	replace tutor_jobsurvey = 0 if tutor_jobsurvey ==.
	
	gen b2_s_p = (y2_baseline==1)  if year==2
	gen b2_e_p = (b2_r_math~=.)    if year==2
	gen f2_e_p = (f2_r_math~=.)    if year==2
	gen y2_subject = b2_e_p * b2_s_p
	gen y2_subject2 = b2_e_p
	
	
********************************************************************************************************************
/*
  					11. Exam result						
*/
********************************************************************************************************************

****************************************************
/* 		11.1 Standardized score		 */
****************************************************
  * 11.1.1 Score
	gsort y2_standard id
	foreach v of varlist b2_r_math f2_r_math {
	bysort y2_standard: egen `v'_mean = mean(`v') if notutorstudent==1 &y2_subject2==1
	bysort y2_standard: egen `v'_sd = sd(`v') if notutorstudent==1 & y2_subject2==1

	egen `v'_mean1 = max(`v'_mean), by(y2_standard)
	egen `v'_sd1 = max(`v'_sd), by(y2_standard)
	bysort y2_standard: gen `v'_std = (`v'-`v'_mean1)/`v'_sd1 
	}
	rename (b2_r_math_std f2_r_math_std) (b2_z_math f2_z_math)
	
  * 11.1.2 Score change
	gen y2_scorechange = f2_z_math - b2_z_math

****************************************************
/* 		11.2 Rank		 */
****************************************************
 * 11.2.1 Year2 Baseline exam
 	bysort y2_standard : egen b2_rank_math = rank(b2_z_math), field
	bysort y2_standard : egen b2_nstd = count(b2_e_p) if b2_e_p==1
	bysort y2_standard :  gen b2_pct_math = (1-b2_rank_math/b2_nstd)*100
	
 * 11.2.2 Year2 Final exam	
	bysort y2_standard : egen f2_rank_math = rank(f2_z_math), field
	bysort y2_standard : egen f2_nstd = count(f2_e_p) if f2_e_p==1
	bysort y2_standard :  gen f2_pct_math = (1-f2_rank_math/f2_nstd)*100

drop *nstd

****************************************************
/* 		11.3 Subgroup		 */
****************************************************
	gen y2_subgroup = .
	forval i=1/30 {
	local v = `i'*100
	local u = (`i'-1)*100
	bysort y2_standard: replace y2_subgroup = `i'  if b2_rank_math>`u'&b2_rank_math<=`v'
	}

	replace y2_subgroup=y2_subgroup-1 if y2_standard==4&y2_subgroup>9&y2_subgroup<28
	replace y2_subgroup=4  if y2_standard==4&b2_rank_math==425
	replace y2_subgroup=6  if y2_standard==4&b2_rank_math>=609&b2_rank_math<=625
	replace y2_subgroup=8  if y2_standard==4&b2_rank_math==817
	replace y2_subgroup=12 if y2_subgroup==13
	replace y2_subgroup=14 if y2_standard==4&b2_rank_math==1365
	replace y2_subgroup=15 if y2_standard==4&b2_rank_math>=1612&b2_rank_math<=1681
	replace y2_subgroup=18 if y2_standard==4&b2_rank_math>=1930&b2_rank_math<=1971
	replace y2_subgroup=21 if y2_standard==4&b2_rank_math>=2207&b2_rank_math<=2230
	replace y2_subgroup=23 if y2_standard==4&b2_rank_math>=2417&b2_rank_math<=2434
	replace y2_subgroup=25 if y2_standard==4&y2_subgroup==26

	replace y2_subgroup=y2_subgroup-1 if y2_standard==4&y2_subgroup>17&y2_subgroup<20
	replace y2_subgroup=y2_subgroup-2 if y2_standard==4&y2_subgroup>20&y2_subgroup<26


	replace y2_subgroup=21 if y2_standard==5&b2_rank_math==2118
	replace y2_subgroup=22 if y2_standard==5&b2_rank_math>=2213&b2_rank_math<=2244
	replace y2_subgroup=23 if y2_standard==5&y2_subgroup==24

	
****************************************************
/* 		11.4 Subgroup rank		 */
****************************************************n
 * 11.4.1 Baseline exam 
	bysort y2_standard y2_subgroup : egen b2_subrank_math = rank(b2_z_math), field
	bysort y2_standard y2_subgroup : egen b2_nbin         = count(b2_e_p) if b2_e_p==1
	bysort y2_standard y2_subgroup :  gen b2_subpct_math  = (1-b2_subrank_math/b2_nbin)*100

 * 11.4.2 Final exam 
	bysort y2_standard y2_subgroup : egen f2_subrank_math = rank(f2_z_math), field
	bysort y2_standard y2_subgroup : egen f2_nbin         = count(f2_e_p) if f2_e_p==1
	bysort y2_standard y2_subgroup :  gen f2_subpct_math  = (1-f2_subrank_math/f2_nbin)*100

	drop *_nbin
	
	
****************************************************
/* 		11.5 Scholarship selection		 */
****************************************************n
	gen y2_scholar_award= (f2_subpct_math>=85&f2_subpct_math<=100)  if year==2


	
	
********************************************************************************************************************
/*
  					12. Tutor variables
*/
********************************************************************************************************************

**************************************************
/*  	12.1 Tutor dummy 		*/
**************************************************
	gen tutorcog = (y2_tutorcog==1)         if year==2
	gen tutornoncog = (y2_tutornoncog==1)   if year==2
	gen tutorday_mw = (y2_tutorday=="MON-WED")  if year==2
	gen tutorday_tt = (y2_tutorday=="TUE-THU")  if year==2
	replace clsssize_mw = . if tutorday_mw==0
	replace clsssize_tt = . if tutorday_tt==0

	gen tutor_highmath = math_high * tutorstudent
	gen tutor_lowmath  = math_low  * tutorstudent
	
	gen tutor_highmock = mock_high * tutorstudent
	gen tutor_lowmock  = mock_low  * tutorstudent
	
		
	gen scholar_tutor_highmath = scholarschool * tutor_highmath
	gen scholar_tutor_lowmath  = scholarschool * tutor_lowmath
	gen scholar_tutor_highmock = scholarschool * tutor_highmock
	gen scholar_tutor_lowmock  = scholarschool * tutor_lowmock
	
	gen scholar_tutorcog    = scholarschool * tutorcog
	gen scholar_tutornoncog = scholarschool * tutornoncog

	drop y2_tutorcog y2_tutornoncog y2_group
**************************************************
/*  	12.2 Number of students for each tutoring class */
**************************************************
	bysort y2_tutorid y2_tutorclasscode : egen nstu_tutorclass = sum(tutorstudent)  if year==2
	

****************************************************
/* 		12.3 Interaction of tutored students and both group	(Year2)	 */
****************************************************
	gen tutorstu_bothschool  = tutorstudent*bothschool
	gen tutorstu_scholarship = tutorstudent*scholarschool
	gen notutorstu_scholarship = notutorstudent*scholarschool

********************************************************************************************************************
/*
  					13. Intermediate outcomes
*/
********************************************************************************************************************
**************************************************
/*  	13.1 Understanding and expectation of scholarship 		*/
**************************************************
	recode y2_quiz5 (1=4) (2=3) (3=2) (4=1), gen(y2_expectation)
	replace y2_expectation=. if y2_quiz5==5
	gen y2_understand4 = (y2_quiz1_4==4) if y2_quiz1_4~=.
	gen y2_expectation3 = (y2_expectation==3|y2_expectation==4) if y2_expectation~=.
	
	

********************************************************************************************************************
/*
  					14. Demographic characteristics
*/
********************************************************************************************************************


********************************************************************************************************************
/*
  					15. School characteristics
*/
********************************************************************************************************************
****************************************************
/* 		15.1  Size		 */
****************************************************
 * 15.1.1 Class size
	bysort y1_school_class y2_school_class : egen classsize = count(id)

 * 15.1.2 School size
	bysort y1_schoolno y2_schoolno : egen schoolsize = count(id)

****************************************************
/* 		15.2 Class Mean Score		 */
****************************************************
 * 15.2.1 Year 1 Baseline exam
	bysort y1_school_class : egen b1_classmean = mean(b1_z_total)
	
 * 15.2.2 Year 1 Midyear exam
	bysort y1_school_class : egen m1_classmean = mean(m1_z_total)
	
 * 15.2.3 Year 1 Final exam
	bysort y1_school_class : egen f1_classmean = mean(f1_z_total)
	
 * 15.2.4 Year 2 Baseline exam
	bysort y2_school_class : egen b2_classmean = mean(b2_z_math)
	
 * 15.2.4 Year 2 Final exam
	bysort y2_school_class : egen f2_classmean = mean(f2_z_math)
	
****************************************************
/* 		15.3 School Mean Score		 */
****************************************************
 * 15.3.1 Year 1 Baseline exam
	bysort y1_schoolno : egen b1_schoolmean = mean(b1_z_total)
	
 * 15.3.2 Year 1 Midyear exam
	bysort y1_schoolno : egen m1_schoolmean = mean(m1_z_total)
	
 * 15.3.3 Year 1 Final exam
	bysort y1_schoolno : egen f1_schoolmean = mean(f1_z_total)
	
 * 15.3.4 Year 2 Baseline exam
	bysort y2_schoolno : egen b2_schoolmean = mean(b2_z_math)
	
 * 15.3.4 Year 2 Final exam
	bysort y2_schoolno : egen f2_schoolmean = mean(f2_z_math)
	
	 
********************************************************************************************************************
/*
					16. Ordering variables 
*/
********************************************************************************************************************
	order b2_*, a(y2_school_class)
	order f2_*, a(b2_subpct_math)

	*/
 
********************************************************************************************************************
/*
  					17. Labeling variables
*/
********************************************************************************************************************

  label define zone 1 "Balangombe", add
  label define zone 2 "Malikha", add
  label define zone 3 "Mchemani", add

  label var merit "Merit"
  label var relative "Relative merit"
  label var feedback "Feedback"
  label var meritfeed "Merit x Feedback"
  label var relativefeed "Relative merit x Feedback"

  label var age "Age"
  label var b1_e_p "Baseline exam participation"
  label var b1_z_total "Baseline score: Total"
  label var b1_pct "Baseline rank(%)"
  label var b1_subpct "Baseline: Subgroup Rank(%)"
  label var m1_e_p "Mid-term exam participation"
  label var m1_z_total "Mid-term Score"
  label var m1_pct "Mid-term Rank(%)"
  label var m1_subrank "Mid-term Subgroup Rank"
  label var m1_subpct "Mid-term Subgroup Rank(%)"
  label var f1_e_p "Final exam participation"
  label var f1_pct "Final exam rank(%)"
  label var f1_z_total "Final exam score"

  label var b1_q105_chewa "Ethnic group: Chewa"
  label var b1_q105 "Ethnicity"
  label var b1_q106 "Language"
  label var b1_hhsize "Size of a household"
  label var b1_motivation "Motivation to study hard"
  label var b1_s9_selfesteem "Self esteem"
  label var b1_s9_grit "Grit Scale"
  label var b1_s5_asset "Asset index"
  label var b1_s9_conscientious "Conscientiousness"
  label var b1_q216g4 "School performance(Self evaluated)"
  label var b1_teacherindex "Teacher effort Index"
  label var b1_parentaleffort_index "Parental effort"
  label var b1_s10_score1 "Raven test score"
  label var b1_studyhour "Study hours after class"
  label var b1_q2202 "Homework completion"
  
  label var y1_fuq "Follow-up survey participation"
  label var f1_studyhour_ac "Study hours after class"
  label var f1_studyhour_we "Study hours on weekends"
  label var f1_q2172 "Homework completion"
  label var f1_motivation "Motivation to study hard"
  label var f1_s7_selfesteem "Self esteem"
  label var f1_s7_grit "Grit scale"
  label var f1_s7_conscientious "Conscientiousness"
  label var f1_q213g "School performance(Self evalauted)"
  label var f1_teacherindex "Teacher Effort Index"
  label var f1_parentaleffort_index "Parental effort"
  label var f1_q109 "Family members' asking to study"
  label var f1_q503 "Parents' mentioning scholarship"
  label var f1_s6_care "Teacher cares"
  label var f1_s6_challenge "Teacher challenges"
  label var f1_s6_control "Teacher controls"
  label var f1_s6_clarify "Teacher clarifies"
  label var f1_s6_captivate "Teacher captivates"
  label var f1_s6_confer "Teacher confers"
  label var f1_s6_consolidate "Teacher consolidates"
  label var understand_scholarship "Understanding schorlarship1"
  label var f1_understand_scholarship "Understanding scholarship2"
  label var expect_scholarship2 "Expectation for scholarship1"
  label var f1_expect_scholarship2 "Expectation for scholarship2"
  label var f1_s8_score1 "Raven test score"
  label var f1_s9_score "Computation test score"
  label var f1_q110c "Father encouraging to study hard"
  label var f1_q110d "Mother encouraging to study hard"
  label var f1_q110e "Parents explaining difficult schoolwork"
  label var f1_q110f "Parents helping study"
  label var f1_q110g "Parents helping homework"
  label var f1_q110h "Parents know school grades"
  label var f1_q110i "Parental positive reinforcement of academic achievement"
  label var f1_q110j "Parental negative reinforcement of academic setback"
  label var f1_q110k "Parents require to get good grade"
  label var f1_q110l "Parental availability for academic help"  

  label var atd_abs_bam "Attendance"
  label var atd_abs_aa0m "Attendance after announcement"  

  label var b1_top15 "Baseline top 15%"
  label var m_b1top15 "Standard x Top 15%"
  label var r_b1top15 "Relative x Top 15%"
  label var f_b1top15 "Feedback x Top 15%"
  label var mf_b1top15 "Standard x Feedback x Top 15%"
  label var rf_b1top15 "Relative x Feedback x Top 15%"
  label var b1_sub15 "Basline subgroup Top 15%"
  label var m_b1sub15 "Standard x Subgroup Top 15%"
  label var r_b1sub15 "Relative x Subgroup Top 15%"
  label var f_b1sub15 "Feedback x Subgroup Top 15%"
  label var mf_b1sub15 "Feedback x Standard x Subgroup Top 15%"
  label var rf_b1sub15 "Feedback x Relative x Subgroup Top 15%"
  label var m1_top15 "Mid-term Top 15%"
  label var m_m1top15 "Standard x Top 15% (midyear)"
  label var r_m1top15 "Relative x Top 15% (midyear)"
  label var f_m1top15 "Feedback x Top 15% (midyear)"
  label var mf_m1top15 "Feedback x Standard x Top 15% (midyear)"
  label var rf_m1top15 "Feedback x Relative x Top 15% (midyear)"
 
  label var m1_sub15 "Mid-term subgroup Top 15%"
  label var m_m1sub15 "Standard x Subgroup Top 15% (midyear)"
  label var r_m1sub15 "Relative x Subgroup Top 15% (midyear)"
  label var f_m1sub15 "Feedback x Subgroup Top 15% (midyear)"
  label var mf_m1sub15 "Feedback x Standard x Subgroup Top 15% (midyear)"
  label var rf_m1sub15 "Feedback x Relative x Subgroup Top 15% (midyear)"

  	lab var b1_hhsize "Household size"
 	lab var b1_studyhour "Study hours after class"
	lab var b1_motivation "Motivation to study"
	lab var b1_s9_selfesteem "Self-esteem"
	lab var b1_s9_grit "Grit"
	lab var b1_s9_conscientious "Conscientious"
	lab var b1_teacherindex "Teacher effort index"
	lab var b1_teachereffort_index "Teacher effort index"
	lab var atd_abs_bam "Attendance"
*	lab var b1_z_math "Baseline score: Math"
  /*
   	label var y2_control "Control"
	label var tutoronly "Tutor Only school"
	label var scholaronly "Scholarship Only school"
	label var bothschool "Both school"
	label var tutorstudent "Tutored students"
	label var scholarschool "Scholarship school"
	label var b2_z_math "Baseline score"
	label var f2_z_math "Final score"
	label var b2_s_p "Baseline survey participation"
	label var b2_e_p "Baseline exam participation"
	label var f2_e_p "Final exam participation"
	label var y2_register "Year2 Registered students"
	label var y2_subject "Year2 sample"
	label var b2_rank_math "Baeline Exam rank"
	label var f2_rank_math "Followup Exam rank"
	label var y2_subgroup "Subgroup using math score"
	label var f2_subrank_math "Subgroup rank in the final exam"
	label var f2_subpct_math "Percentage subgroup rank"
	label var y2_scholar_award "Year2 Scholarship awardees"
	label var y2_quiz1_4 "Scholarship Understanding(Year2)"
	label var y2_expectation "Scholarship expectation(Year2)"
*/

********************************************************************************************************************
/*
  					18. Compress and save data
*/
********************************************************************************************************************
drop __*
qui compress
saveold "${data_path}/merged_y1y2.dta", replace



